// OpentTxl-C Version 11 identifier/token table
// J.R. Cordy, Jan 2023

// Copyright 2023, James R. Cordy and others

// Permission is hereby granted, free of charge, to any person obtaining a copy of this software 
// and associated documentation files (the “Software”), to deal in the Software without restriction, 
// including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
// and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
// subject to the following conditions:

// The above copyright notice and this permission notice shall be included in all copies 
// or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE 
// AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

// The identifier (and all terminal tokens) table.
// Define and maintain the hash table for identifiers and other tokens.
// Although it's called the identifier table, all tokens are now hashed into this table.

// Modification Log

// v11.0 Initial revision, adapted from OpenTxl 11.0

// I/O, strings, memory allocation
#include "support.h"

// Global modules
#include "locale.h"
#include "limits.h"
#include "options.h"
#include "tokens.h"
#include "errors.h"
#include "trees.h"
#include "shared.h"
#include "charset.h"
#include "idents.h"
#include "symbols.h"

// Check interface consistency
#include "idents.h"

// The identifier hash table - index is the hash code, entry is the address of the text in identText
// Allocated dynamically
array (longstring *, ident_idents);

// Token kind of the identifier
array (enum treeKindT, ident_identKind);

// Shared tree optimization - all instances of an identifier share one tree node
array (treePT, ident_identTree);
int ident_nIdents;

// The text of identifiers in the hash table, stored as consecutive strings 
// Allocated dynamically
array (unsigned char, ident_identText);
int ident_nIdentChars;

// New superfast hash function
unsigned int ident_hash (const longstring s)
{
    unsigned int h = lstringlen (s);
    unsigned int j = h - 1;
    if (h > 0) {
        {
            const int iend = h >> 1;
            for (int i = 0; i <= iend; i++) { 
                h += (h << 1) + (unsigned char) s[i];
                h += (h << 1) + (unsigned char) s[j];
                j -= 1;
            }
        }
    }
    return (h);
}

// Choose an appropriate secondary hash - critical to performance!
// By choosing the largest prime modulo table size, we are certain to hit every table element
int ident_secondaryHash;
#define NPRIMES 10
// 1-origin [1 .. NPRIMES]
const int ident_primes[NPRIMES + 1] = 
    {UNUSED, 1021, 2027, 4091, 8191, 16381, 32003, 65003, 131009, 262007, 524047};

tokenT ident_lookup (const string ident)
{
    // Begin with the hash, modulo table size
    unsigned int identIndex = ident_hash (ident);
    identIndex &= maxIdents - 1;
    const unsigned int startIndex = identIndex;

    while (true) {
        // If we hit the nil identifier, it isn't here
        if (ident_idents[identIndex] == nilIdent) {
            return (NOT_FOUND);
        }

        // Is this one it?
        if (stringcmp (*ident_idents[identIndex], ident) == 0) break;

        // Nope, try secondary hash
        identIndex += ident_secondaryHash;
        identIndex &= maxIdents - 1;

        // If we've searched the whole table, then it isn't in there
        if (identIndex == startIndex) {
            return (NOT_FOUND);
        }
    }

    return (identIndex);
}

tokenT ident_install (const string ident, const enum treeKindT kind)
{
    // Begin with the hash, modulo table size
    unsigned int identIndex = ident_hash (ident);
    identIndex &= maxIdents - 1;
    const unsigned int startIndex = identIndex;

    while (true) {
        // Is this an empty slot? If so, install it there
        if (ident_idents[identIndex] == nilIdent) {
            // Check for room to install its text
            const int lengthid = lstringlen (ident);

            if (ident_nIdentChars + lengthid + 1 > maxIdentChars) {
                string message;
                stringprintf (message, "Input too large (total text of unique tokens > %d chars) (a larger size is required for this input)", maxIdentChars);
                error ("", message, LIMIT_FATAL, 101);
            }

            // OK, store it
            lstringcpy (* (longstring *) &(ident_identText[ident_nIdentChars - 1]), ident);
            ident_idents[identIndex] = (longstring *) &(ident_identText[ident_nIdentChars - 1]);
            ident_identKind[identIndex] = kind; // default - may be changed later
            ident_nIdentChars += lengthid + 1;
            ident_nIdents++;
            break;
        }

        // If this one's it, then it's already in the table (or we just installed it above)
        if (lstringcmp (*ident_idents[identIndex], ident) == 0) break;

        // Use the secondary hash to find the next slot
        identIndex += ident_secondaryHash;
        identIndex &= maxIdents - 1;

        // If we don't find it or an empty slot for it, then we're out of table space
        if (identIndex == startIndex) {
            string message;
            stringprintf (message, "Input too large (total number of unique tokens > %d) (a larger size is required for this input)", maxIdents);
        }
    }

    // Shared tree optimization - we use the same tree node for every instance of the same identifier
    if (ident_identTree[identIndex] == nilTree) {
        ident_identTree[identIndex] = tree_newTreeInit (kind, identIndex, identIndex, 0, nilKid);
    }

    return (identIndex);
}

// We may discover that an identifier is of a different token kind later
void ident_setKind (const tokenT identIndex, const enum treeKindT kind)
{
    ident_identKind[identIndex] = kind;
    tree_setKind (ident_identTree[identIndex], kind);
}

// Initialization
void ident (void) {
    // Dynamic allocations
    arrayalloc (maxIdents, longstring *, ident_idents);
    arrayalloc (maxIdents, enum treeKindT, ident_identKind);
    arrayalloc (maxIdents, treePT, ident_identTree);
    ident_nIdents = 0;

    arrayalloc ((maxIdentChars + maxStringLength + 1), unsigned char, ident_identText);
    ident_nIdentChars = 1;

    // Initialize the table
    for (int i = 0; i <= maxIdents - 1; i++) {
        ident_idents[i] = nilIdent;
        ident_identKind[i] = treeKind_literal;     // will be set by scanner when actually used
        ident_identTree[i] = nilTree;
    }

    // Need one character as a placeholder for the null token
    lstringcpy (* (longstring *) &(ident_identText[0]), " ");
    ident_nIdentChars += 2;

    // Ident 0 is the null token, with no characters in it
    ident_idents[0] = (longstring *) &(ident_identText[1]);
    ident_identKind[0] = treeKind_literal;
    ident_identTree[0] = nilTree;
    ident_nIdents = 1;

    // Choose an appropriate secondary hash - critical to performance!
    // By choosing the largest prime modulo table size, we are certain to hit every table element
    ident_secondaryHash = 11;
    for (int i = 1; i <= NPRIMES; i++) {
        if (ident_primes[i] > maxIdents) break;
        ident_secondaryHash = ident_primes[i];
    }
}
